Tuberculosis (TB) continues to increase worldwide despite vigorous attempts to control it. Bacillus Calmette-Guerin (BCG) is the only licensed vaccine currently available for protection against TB, however, its efficacy is highly variable between countries. It has been hypothesized that BCG’s variable protection is due, among others, to immunological interference by environmental, non-tuberculous mycobacteria (NTM). However, a definitive mechanism has not been identified so far. Considering the foregoing, We developed a murine model closely resembling the natural history of human exposure to different mycobacterial species, including: 1) BCG vaccination at an early age; 2) exposure to viable NTMs (Mycobacterium avium subsp. avium) via the oral route and 3) maintaining continuous NTM exposure even after TB infection, as occurs in endemic places.
The presented data has six groups:
We have performed 10x spatial visium transciptomics on formaldehyde-fixed paraffin embedded (FFPE) lung tissues at day 120 post Mtb infection. We have seen lymphoid follicles in lungs of mice vaccinated with BCG, exposed with high concentration of NTM and challenged with Mtb. These lymphoid follicles are correlated with decreased Mtb bacterial burden in the lungs and also with increased B cells and anti-Mtb cell lysate IgA aand IgG antibodies. Therefore, to further evaluate the protection mechanisms, we have performed spatial transcriptomics o lung tissues.
#remove.packages("Seurat")
#install.packages('remotes')
#remotes::install_version("Seurat", version = "4.0.5")
#BiocManager::install("clusterProfiler")
#install.packages("enrichplot")
#installed.packages("patchwork")
#BiocManager::install("SingleR")
#BiocManager::install("celldex")
#BiocManager::install("ensembldb")
#BiocManager::install('limma')
#BiocManager::install("DESeq2")
#organism = "org.Mm.eg.db"
#BiocManager::install(organism)
#BiocManager::install("pathview")
#install.packages("Seurat")
#install.packages("remotes")
#BiocManager::install("rhdf5")
#install.packages("hdf5r")
#BiocManager::install("NeuCA")
#install.packages('devtools')
#install_github("ggjlab/scMCA")
#install.packages("pheatmap")
#BiocManager::install("scater")
#devtools::install_github("sqjin/CellChat")
#BiocManager::install("ComplexHeatmap")
In this step, we are loading the libraries which are required for analysis and plotting of the data.
library(Seurat)
library(devtools)
library(ggplot2)
library(patchwork)
library(dplyr)
library(clusterProfiler)
library(enrichplot)
# we use ggplot2 to add x axis labels (ex: ridgeplot)
library(stringr)
library(celldex)
library(ensembldb)
library(SingleR)
library(DESeq2)
library(org.Mm.eg.db)
library(rhdf5)
library(hdf5r)
library(pheatmap)
library(ComplexHeatmap)
library(CellChat)
library(patchwork)
options(stringsAsFactors = FALSE)
library(NMF)
library(ggalluvial)
# Mouse 1
ntm_bcg_mouse_1 <- "/Volumes/Seagate_4TB/spatial_ntm_samples/demultiplexed/ntm_bcg_sample_round_one/outs/"
list.files(ntm_bcg_mouse_1)
## [1] "analysis"
## [2] "cloupe.cloupe"
## [3] "filtered_feature_bc_matrix_ntm_bcg_1"
## [4] "filtered_feature_bc_matrix.h5"
## [5] "metrics_summary.csv"
## [6] "molecule_info.h5"
## [7] "possorted_genome_bam.bam"
## [8] "possorted_genome_bam.bam.bai"
## [9] "probe_set.csv"
## [10] "raw_feature_bc_matrix"
## [11] "raw_feature_bc_matrix.h5"
## [12] "spatial"
## [13] "spatial_enrichment.csv"
## [14] "web_summary.html"
ntm_bcg_mouse_1_data <- Load10X_Spatial(
ntm_bcg_mouse_1,
filename = "filtered_feature_bc_matrix.h5",
assay = "spatial",
slice = "ntmbcg",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL)
# Mouse 2
ntm_bcg_mouse_2 <- "/Volumes/Seagate_4TB/spatial_ntm_samples/demultiplexed/ntm_bcg_sample_round_two/outs/"
list.files(ntm_bcg_mouse_2)
## [1] "analysis"
## [2] "cloupe.cloupe"
## [3] "filtered_feature_bc_matrix_ntm_bcg_2"
## [4] "filtered_feature_bc_matrix.h5"
## [5] "metrics_summary.csv"
## [6] "molecule_info.h5"
## [7] "ntm+bcg_web_summary.html"
## [8] "possorted_genome_bam.bam"
## [9] "possorted_genome_bam.bam.bai"
## [10] "probe_set.csv"
## [11] "raw_feature_bc_matrix"
## [12] "raw_feature_bc_matrix.h5"
## [13] "spatial"
## [14] "spatial_enrichment.csv"
ntm_bcg_mouse_2_data <- Load10X_Spatial(
ntm_bcg_mouse_2,
filename = "filtered_feature_bc_matrix.h5",
assay = "spatial",
slice = "ntmbcg",
filter.matrix = TRUE,
to.upper = FALSE,
image = NULL)
SCTransform function will NormalizeData, FindVariableFeatures, and ScaleData workflow. Results are saved in a new assay (named SCT by default) with counts being (corrected) counts, and data being log1p(counts).
# Mouse 1
ntm_bcg_mouse_1_data <- SCTransform(ntm_bcg_mouse_1_data, assay = "spatial", verbose = FALSE)
# Mouse 2
ntm_bcg_mouse_2_data <- SCTransform(ntm_bcg_mouse_2_data, assay = "spatial", verbose = FALSE)
p1 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cd19", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p2 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Igha", alpha = c(0.1, 1), pt.size.factor = 1.8)
p3 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Bcl6", alpha = c(0.1, 1), pt.size.factor = 1.8)
p4 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Lta", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p5 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Ltb", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p6 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cd4", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p7 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Mki67", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p8 <- SpatialFeaturePlot(ntm_bcg_mouse_1_data, features = "Cxcr5", alpha = c(0.1, 1.5), pt.size.factor = 1.8)
p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8
p1 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cd19", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p2 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Igha", alpha = c(0.1, 1), pt.size.factor = 1.5)
p3 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Bcl6", alpha = c(0.1, 1), pt.size.factor = 1.5)
p4 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Lta", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p5 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Ltb", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p6 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cd4", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p7 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Mki67", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p8 <- SpatialFeaturePlot(ntm_bcg_mouse_2_data, features = "Cxcr5", alpha = c(0.1, 1.5), pt.size.factor = 1.5)
p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8
Clustering is the assignment of objects to homogeneous groups (called clusters) while making sure that objects in different groups are not similar. Clustering is considered an unsupervised task as it aims to describe the hidden structure of the objects.
Dimensionality reduction is a process to reduce the number of features under consideration, where each feature is a dimension that partly represents the objects. Why is dimensionality reduction important? As more features are added, the data becomes very sparse and analysis suffers from the curse of dimensionality. Additionally, it is easier to process smaller data sets. Dimensionality reduction can be executed using two different methods:
# Mouse 1
ntm_bcg_mouse_1_analyzed <- RunPCA(ntm_bcg_mouse_1_data, assay = "SCT", verbose = FALSE)
ntm_bcg_mouse_1_analyzed <- FindNeighbors(ntm_bcg_mouse_1_analyzed, reduction = "pca", dims = 1:5)
ntm_bcg_mouse_1_analyzed <- FindClusters(ntm_bcg_mouse_1_analyzed, verbose = FALSE, resolution = 0.8)
ntm_bcg_mouse_1_analyzed <- RunUMAP(ntm_bcg_mouse_1_analyzed, reduction = "pca", dims = 1:5)
# Mouse 2
ntm_bcg_mouse_2_analyzed <- RunPCA(ntm_bcg_mouse_2_data, assay = "SCT", verbose = FALSE)
ntm_bcg_mouse_2_analyzed <- FindNeighbors(ntm_bcg_mouse_2_analyzed, reduction = "pca", dims = 1:5)
ntm_bcg_mouse_2_analyzed <- FindClusters(ntm_bcg_mouse_2_analyzed, verbose = FALSE, resolution = 0.8)
ntm_bcg_mouse_2_analyzed <- RunUMAP(ntm_bcg_mouse_2_analyzed, reduction = "pca", dims = 1:5)
# First convert the seurat object to single cell experiment object (otherwise the SingleR pipeline will not work)
# Mouse 1
ntm_bcg_mouse_1_analyzed.sce <- as.SingleCellExperiment(ntm_bcg_mouse_1_analyzed)
# Mouse 2
ntm_bcg_mouse_2_analyzed.sce <- as.SingleCellExperiment(ntm_bcg_mouse_2_analyzed)
#create reference data
ref.data <- celldex::ImmGenData()
ref.data
## class: SummarizedExperiment
## dim: 22134 830
## metadata(0):
## assays(1): logcounts
## rownames(22134): Zglp1 Vmn2r65 ... Tiparp Kdm1a
## rowData names(0):
## colnames(830):
## GSM1136119_EA07068_260297_MOGENE-1_0-ST-V1_MF.11C-11B+.LU_1.CEL
## GSM1136120_EA07068_260298_MOGENE-1_0-ST-V1_MF.11C-11B+.LU_2.CEL ...
## GSM920654_EA07068_201214_MOGENE-1_0-ST-V1_TGD.VG4+24ALO.E17.TH_1.CEL
## GSM920655_EA07068_201215_MOGENE-1_0-ST-V1_TGD.VG4+24ALO.E17.TH_2.CEL
## colData names(3): label.main label.fine label.ont
# run singleR pipeline to find the cell types
## Mouse 1
cell_types_mouse_1 <- SingleR(test=ntm_bcg_mouse_1_analyzed.sce, assay.type.test=1,
ref=ref.data, labels=ref.data$label.main)
## Mouse 2
cell_types_mouse_2 <- SingleR(test=ntm_bcg_mouse_2_analyzed.sce, assay.type.test=1,
ref=ref.data, labels=ref.data$label.main)
# Mouse 1
ntm_bcg_mouse_1_analyzed[["ref.data"]] <- cell_types_mouse_1$labels
# Mouse 2
ntm_bcg_mouse_2_analyzed[["ref.data"]] <- cell_types_mouse_2$labels
my_cols <- c('B cells'='#F8766D','DC'='#39568CFF','Endothelial cells'='#CD9600','Epithelial cells'='#00C19A','Fibroblasts'= "#C77CFF",
'Macrophages'='#00A9FF','Monocytes'='#0CB702','Stromal cells'='#FF61CC')
p1 <- DimPlot(ntm_bcg_mouse_1_analyzed, group.by = c("ref.data"), reduction = "umap",label = F, pt.size = 1.5, label.size = 5, cols = my_cols)
p2 <- SpatialDimPlot(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", label = FALSE, label.size = 5, cols = my_cols)
p1 + p2
p1 <- DimPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), reduction = "umap",label = F, pt.size = 1.5, label.size = 5, cols = my_cols)
p2 <- SpatialDimPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), label = FALSE, label.size = 5, cols = my_cols)
p1 + p2
VlnPlot(ntm_bcg_mouse_1_analyzed, group.by = c("ref.data"), features = c("Cxcr5", "Mki67", "Ltb", "Cd38", "Ccr6", "Tnfrsf13c", "Fcrla"), cols = my_cols)
VlnPlot(ntm_bcg_mouse_2_analyzed, group.by = c("ref.data"), features = c("Cxcr5", "Mki67", "Ltb", "Cd38", "Ccr6", "Tnfrsf13c", "Fcrla"), cols = my_cols)
B_cell_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "B cells", min.pct = 0.25)
head(B_cell_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Spib 1.884647e-23 0.9418102 0.871 0.188 3.071410e-19
## Fcrla 9.367105e-22 0.7372041 0.710 0.127 1.526557e-17
## Ms4a1 2.140325e-20 0.9122884 0.710 0.143 3.488088e-16
## Blk 3.797112e-20 0.5455946 0.452 0.056 6.188154e-16
## Cd19 8.972829e-20 1.4938727 0.903 0.310 1.462302e-15
## Ighd 2.212679e-19 1.0746787 0.968 0.343 3.606004e-15
## Cxcr5 1.248954e-17 0.6716025 0.581 0.105 2.035420e-13
## Cd79b 3.723329e-17 1.2436082 1.000 0.462 6.067909e-13
## Cd52 5.006688e-17 1.1761909 1.000 0.925 8.159399e-13
## H2-DMb2 7.077493e-17 1.3906987 1.000 0.959 1.153419e-12
write.csv(B_cell_mouse_1, "DATA/bcg_ntm/B_cell_mouse_1.csv")
DC_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "DC", min.pct = 0.25)
head(DC_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Tmem132e 9.059847e-86 0.3850616 0.308 0.001 1.476483e-81
## Slc1a2 1.817320e-10 0.4710357 0.462 0.054 2.961686e-06
## Timd4 3.649406e-10 0.6152564 0.615 0.096 5.947437e-06
## Sytl4 2.704997e-09 0.4086301 0.385 0.043 4.408334e-05
## Fcho1 3.276719e-09 0.5709341 0.692 0.128 5.340069e-05
## Il12b 4.327837e-09 0.4568340 0.462 0.061 7.053076e-05
## 6030468B19Rik 8.465842e-09 0.3451095 0.308 0.029 1.379678e-04
## Afmid 3.004945e-08 0.4004987 0.385 0.048 4.897159e-04
## Rhoh 4.550822e-08 0.3967611 0.385 0.049 7.416475e-04
## Gpr132 6.617050e-08 0.5965298 0.692 0.149 1.078381e-03
write.csv(DC_mouse_1, "DATA/bcg_ntm/DC_mouse_1.csv")
Stromal_cells_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "Stromal cells", min.pct = 0.25)
head(Stromal_cells_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Des 4.845871e-19 0.6173361 0.860 0.749 7.897316e-15
## Car3 9.790880e-18 1.3014565 0.530 0.353 1.595620e-13
## Cox8b 4.075760e-16 0.5873284 0.258 0.123 6.642266e-12
## Ttn 2.775457e-15 0.9879534 0.358 0.211 4.523162e-11
## Acta1 2.926203e-15 0.7622334 0.261 0.129 4.768833e-11
## Myh11 4.216275e-15 0.2647998 0.485 0.302 6.871263e-11
## Tpm1 2.389877e-14 0.6391463 1.000 1.000 3.894783e-10
## Myom1 1.335642e-13 0.3498079 0.259 0.133 2.176695e-09
## Fabp4 3.689892e-13 1.2113353 0.515 0.372 6.013417e-09
## Tnnc1 7.578253e-13 0.9014321 0.347 0.214 1.235028e-08
write.csv(Stromal_cells_mouse_1, "DATA/bcg_ntm/Stromal_cells_mouse_1.csv")
Endothelial_cells_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "Endothelial cells", min.pct = 0.25)
head(Endothelial_cells_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Cldn5 2.907564e-94 0.6746717 0.999 0.945 4.738457e-90
## Igfbp2 1.828819e-82 0.6540357 1.000 0.948 2.980426e-78
## Acer2 8.520134e-81 0.5753817 0.997 0.947 1.388526e-76
## Tns1 1.568608e-78 0.5137977 1.000 0.991 2.556360e-74
## C3 1.460961e-77 -0.7289525 0.997 0.998 2.380928e-73
## Pakap.1 1.418332e-76 0.4983050 0.999 0.984 2.311456e-72
## C1qa 8.764174e-74 -0.7462537 0.904 0.973 1.428297e-69
## Egfl7 4.863529e-73 0.5618153 1.000 0.912 7.926094e-69
## Inmt 5.234591e-73 0.5960425 0.997 0.943 8.530813e-69
## Epas1 6.429208e-73 0.5337445 1.000 0.966 1.047768e-68
write.csv(Endothelial_cells_mouse_1, "DATA/bcg_ntm/Endothelial_cells_mouse_1.csv")
Epithelial_cell_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "Epithelial cells", min.pct = 0.25)
head(Epithelial_cell_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Cfap99 2.122884e-40 0.3433547 0.273 0.003 3.459665e-36
## Eno4 2.449205e-28 0.5165823 0.455 0.015 3.991469e-24
## Spef2 6.332139e-27 0.7402837 0.636 0.033 1.031949e-22
## Dnah6 3.985786e-26 0.6749811 0.545 0.025 6.495636e-22
## Efcab1 1.411071e-24 0.5140130 0.455 0.018 2.299623e-20
## Ccdc121 1.582202e-22 0.5223802 0.364 0.013 2.578515e-18
## Alox12e 1.497807e-21 0.5829836 0.545 0.030 2.440976e-17
## Ggt6 2.925302e-21 0.5095276 0.455 0.021 4.767365e-17
## Fam81b 3.546960e-21 0.7434482 0.545 0.032 5.780480e-17
## Dpp6 1.736230e-20 0.4273356 0.364 0.014 2.829533e-16
write.csv(Epithelial_cell_mouse_1, "DATA/bcg_ntm/Epithelial_cell_mouse_1.csv")
Fibroblasts_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "Fibroblasts", min.pct = 0.25)
head(Fibroblasts_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Gsn 2.129296e-08 0.2501452 0.988 0.959 0.0003470114
## Clec3b 1.243076e-06 0.3409435 0.958 0.922 0.0202584109
## Col3a1 3.726317e-06 0.2717886 0.994 0.989 0.0607277882
## Ighg2b 3.038943e-04 -0.3535009 0.427 0.522 1.0000000000
## Iglc1 3.444621e-04 -0.3950479 0.615 0.662 1.0000000000
## Cbr2 3.688341e-04 0.3383723 0.885 0.821 1.0000000000
## Ighj1 3.315223e-03 -0.3933206 0.939 0.951 1.0000000000
## Slpi 3.407754e-03 -0.3098203 0.939 0.960 1.0000000000
## Jchain 7.673227e-03 -0.3719164 0.991 0.981 1.0000000000
## Cyp2f2 1.836577e-02 0.3969943 0.997 0.980 1.0000000000
write.csv(Fibroblasts_mouse_1, "DATA/bcg_ntm/Fibroblasts_mouse_1.csv")
Macrophages_mouse_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "Macrophages", min.pct = 0.25)
head(Macrophages_mouse_1, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Inmt 5.947621e-120 -1.5346909 0.851 0.988 9.692838e-116
## Igfbp2 9.558494e-111 -1.4883978 0.881 0.988 1.557748e-106
## Tns1 6.016755e-110 -1.0762786 0.978 0.998 9.805506e-106
## Cldn5 1.427166e-108 -1.3438779 0.873 0.987 2.325852e-104
## Epas1 1.179950e-104 -1.1411513 0.923 0.991 1.922965e-100
## Cd74 5.804052e-99 0.4779757 1.000 1.000 9.458864e-95
## Pcolce2 1.269977e-98 -1.2735655 0.624 0.936 2.069682e-94
## Fmo2 2.253327e-98 -1.1674608 0.851 0.977 3.672247e-94
## C3 6.998009e-95 0.8316677 0.997 0.997 1.140465e-90
## Acer2 2.881571e-92 -1.0832747 0.892 0.983 4.696097e-88
write.csv(Macrophages_mouse_1, "DATA/bcg_ntm/Macrophages_mouse_1.csv")
B_cell_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "B cells", min.pct = 0.25)
head(B_cell_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Cr2 8.111841e-111 1.3335168 0.781 0.140 1.147339e-106
## Tnfrsf13c 6.610158e-109 1.5062338 0.870 0.215 9.349407e-105
## Cd19 1.798695e-100 1.5825104 0.932 0.293 2.544075e-96
## Cd79a 6.882185e-100 2.0061965 0.995 0.655 9.734163e-96
## Cxcr5 5.656108e-98 1.0245163 0.719 0.128 8.000000e-94
## Fcrla 9.070035e-93 0.9888835 0.797 0.175 1.282866e-88
## Cd79b 1.744343e-92 1.8567577 0.969 0.428 2.467198e-88
## Ighd 6.213102e-91 1.4257687 0.870 0.252 8.787811e-87
## Spib 7.115527e-90 1.3790276 0.906 0.291 1.006420e-85
## Fcrl5 9.235320e-89 0.9367736 0.615 0.097 1.306244e-84
write.csv(B_cell_mouse_2, "DATA/bcg_ntm/B_cell_mouse_2.csv")
DC_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "DC", min.pct = 0.25)
head(DC_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Fscn1 5.048929e-20 1.0065368 0.903 0.528 7.141206e-16
## Ager 2.407386e-18 -0.9210121 0.860 0.969 3.405007e-14
## Il12b 3.123756e-18 0.3339538 0.290 0.057 4.418241e-14
## Adam8 1.850423e-17 0.5687241 0.774 0.354 2.617239e-13
## Vegfa 1.453622e-16 -0.9902210 0.656 0.908 2.056003e-12
## Itk 1.617670e-16 0.5327627 0.763 0.354 2.288033e-12
## Trbc2 2.673189e-16 0.7156530 0.925 0.739 3.780958e-12
## Sftpc 5.873863e-16 -0.7422910 1.000 1.000 8.307992e-12
## Timp3 6.005500e-16 -0.9851887 0.828 0.947 8.494179e-12
## Tns1 9.956396e-16 -1.0641873 0.753 0.904 1.408233e-11
write.csv(DC_mouse_2, "DATA/bcg_ntm/DC_mouse_2.csv")
Stromal_cells_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "Stromal cells", min.pct = 0.25)
head(Stromal_cells_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Eln 1.044974e-17 1.0589486 0.907 0.642 1.478012e-13
## Myh11 1.458198e-15 1.0955090 0.573 0.222 2.062476e-11
## Gsn 7.872851e-15 0.7161536 1.000 0.872 1.113536e-10
## Ltbp4 1.257061e-14 0.9702758 0.867 0.635 1.777987e-10
## Fmo2 2.340566e-13 0.7548595 0.973 0.737 3.310496e-09
## Myl9 5.182858e-13 0.7656431 0.480 0.177 7.330634e-09
## Clec3b 1.571603e-12 0.9195858 0.893 0.706 2.222876e-08
## Cst3 3.865944e-12 0.4678424 1.000 1.000 5.467991e-08
## Ppp1r14a 6.776818e-12 0.6279264 0.947 0.737 9.585131e-08
## Inmt 1.781123e-11 0.7479552 1.000 0.655 2.519221e-07
write.csv(Stromal_cells_mouse_2, "DATA/bcg_ntm/Stromal_cells_mouse_2.csv")
Endothelial_cells_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "Endothelial cells", min.pct = 0.25)
head(Endothelial_cells_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Igfbp2 1.277601e-126 1.4188105 0.953 0.598 1.807039e-122
## Cldn5 2.129545e-124 1.2155326 0.981 0.678 3.012029e-120
## Epas1 1.746221e-115 1.1141478 0.991 0.841 2.469855e-111
## Tns1 1.282172e-112 0.9788669 0.998 0.854 1.813504e-108
## Inmt 6.138493e-109 1.1670392 0.941 0.553 8.682285e-105
## Pakap.1 2.127892e-97 0.8698975 0.988 0.868 3.009690e-93
## Ager 5.857310e-95 0.7197373 1.000 0.949 8.284579e-91
## Vegfa 1.347840e-93 0.8909449 0.993 0.855 1.906384e-89
## Ace 2.240201e-93 0.9738672 0.920 0.605 3.168540e-89
## Spock2 4.833370e-91 0.9284602 0.913 0.549 6.836318e-87
write.csv(Endothelial_cells_mouse_2, "DATA/bcg_ntm/Endothelial_cells_mouse_2.csv")
Epithelial_cell_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "Epithelial cells", min.pct = 0.25)
head(Epithelial_cell_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Inmt 3.272768e-19 -1.6073218 0.27 0.690 4.629003e-15
## Igfbp2 7.948094e-18 -1.6933439 0.36 0.722 1.124178e-13
## Ckmt1 1.320948e-16 0.3963712 0.45 0.142 1.868349e-12
## Lcn2 1.411799e-16 0.8342200 0.96 0.842 1.996848e-12
## Fxyd3 2.430392e-16 0.3067940 0.27 0.057 3.437547e-12
## Cldn3 5.101704e-16 0.6185109 0.99 0.862 7.215849e-12
## Spint2 6.448865e-16 0.5746184 1.00 0.990 9.121275e-12
## Anxa8 2.003781e-15 0.4762841 0.37 0.108 2.834148e-11
## Tnfaip2 3.153039e-15 0.6345622 1.00 0.998 4.459659e-11
## Acer2 8.382696e-15 -0.9449319 0.68 0.850 1.185648e-10
write.csv(Epithelial_cell_mouse_2, "DATA/bcg_ntm/Epithelial_cell_mouse_2.csv")
Fibroblasts_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "Fibroblasts", min.pct = 0.25)
head(Fibroblasts_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Gsn 1.952403e-18 0.7394821 0.977 0.870 2.761479e-14
## Clec3b 5.949273e-16 0.7269997 0.870 0.702 8.414652e-12
## Cst3 6.636390e-15 0.4288308 1.000 1.000 9.386509e-11
## Ltbp4 1.739443e-14 0.5883409 0.863 0.628 2.460268e-10
## Selenop 1.122721e-13 0.4353445 0.992 0.943 1.587976e-09
## H2-DMb2 1.395579e-13 -0.5924872 0.985 0.999 1.973907e-09
## Gimap7 3.606865e-13 -0.6385908 0.198 0.536 5.101550e-09
## Ctsz 2.339172e-12 -0.4897233 0.985 0.990 3.308525e-08
## Inmt 7.760718e-12 0.5850922 0.931 0.649 1.097676e-07
## Tnfaip2 6.435971e-11 -0.5288026 1.000 0.998 9.103038e-07
write.csv(Fibroblasts_mouse_2, "DATA/bcg_ntm/Fibroblasts_mouse_2.csv")
Macrophages_mouse_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "Macrophages", min.pct = 0.25)
head(Macrophages_mouse_2, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Ctss 9.054290e-69 0.6053018 1.000 1.000 1.280639e-64
## Gbp2b 3.885999e-58 0.5941220 0.999 0.989 5.496357e-54
## Nos2 6.201747e-55 0.8442604 0.846 0.671 8.771750e-51
## Lgals3 6.395388e-47 0.5912143 0.997 0.976 9.045637e-43
## Fth1 6.493402e-47 0.3267162 1.000 1.000 9.184268e-43
## Bst2 1.449126e-46 0.4767256 0.990 0.981 2.049645e-42
## Psap 1.140185e-45 0.4178347 1.000 1.000 1.612678e-41
## Tnfaip2 3.070388e-44 0.5213674 0.999 0.997 4.342756e-40
## Ctsb 2.057596e-42 0.5245380 0.996 0.993 2.910263e-38
## Cldn18 2.526096e-38 0.4332956 0.995 0.954 3.572910e-34
write.csv(Macrophages_mouse_2, "DATA/bcg_ntm/Macrophages_mouse_2.csv")
B_cell_deseq_1 <- FindMarkers(ntm_bcg_mouse_1_analyzed, group.by = "ref.data", ident.1 = "B cells", test.use = "DESeq2")
write.csv(B_cell_deseq_1, "DATA/bcg_ntm/B_cell_deseq_mouse1.csv")
B_cell_deseq_2 <- FindMarkers(ntm_bcg_mouse_2_analyzed, group.by = "ref.data", ident.1 = "B cells", test.use = "DESeq2")
write.csv(B_cell_deseq_2, "DATA/bcg_ntm/B_cell_deseq_mouse2.csv")
organism = org.Mm.eg.db
# reading in data from deseq2
df = read.csv("DATA/bcg_ntm/B_cell_deseq_mouse1.csv", header=TRUE)
# we want the log2 fold change
original_gene_list <- df$avg_log2FC
# name the vector
names(original_gene_list) <- df$X
# omit any NA values
gene_list<-na.omit(original_gene_list)
# sort the list in decreasing order (required for clusterProfiler)
gene_list = sort(gene_list, decreasing = TRUE)
#gene set enrichment function for B cell cluster
gse <- gseGO(geneList=gene_list,
ont ="ALL",
keyType = "SYMBOL",
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = organism,
pAdjustMethod = "none",
eps = 0.0,
nPermSimple = 10000)
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
# Convert gene IDs for gse to KEGG function
# We will lose some genes here because not all IDs will be converted
ids<-bitr(names(gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=organism)
# remove duplicate IDS (here I use "ENSEMBL", but it should be whatever was selected as keyType)
dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]
# Create a new dataframe df2 which has only the genes which were successfully mapped using the bitr function above
df2 = df[df$X %in% dedup_ids$SYMBOL,]
# Create a new column in df2 with the corresponding ENTREZ IDs
df2$Y = dedup_ids$ENTREZID
# Create a vector of the gene unuiverse
kegg_gene_list <- df2$avg_log2FC
# Name vector with ENTREZ ids
names(kegg_gene_list) <- df2$Y
# omit any NA values
kegg_gene_list<-na.omit(kegg_gene_list)
# sort the list in decreasing order (required for clusterProfiler)
kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
kegg_organism = "mmu"
kk2 <- gseKEGG(geneList = kegg_gene_list,
organism = kegg_organism,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "none",
keyType = "ncbi-geneid",
eps = 0.0)
write.csv(kk2, "DATA/bcg_ntm/kk2.csv")
dotplot(kk2, showCategory=, split=".sign") + facet_grid(.~.sign)
library(pathview)
# Produce the native KEGG plot (PNG) for first set of genes
dme <- pathview(gene.data=kegg_gene_list, pathway.id="mmu05417", species = kegg_organism, low = list(gene = "#453781FF"), mid =
list(gene = "#20A387FF"), high = list(gene = "#FDE725FF"))
# Produce a different plot (PDF) (not displayed here)
dme <- pathview(gene.data=kegg_gene_list, pathway.id="mmu05417", species = kegg_organism, kegg.native = F, low = list(gene = "#453781FF"), mid =
list(gene = "#20A387FF"), high = list(gene = "#FDE725FF"))